# Make new rational class
setClass("rational",
slots = list(
a = "numeric",
b = "numeric"
),
validity = function(object) { # Validator
if (is.null(object@a)) {
stop("Rational must have a numerator")
}
if (object@b == 0) {
return("Denominator cannot be zero.")
}
return(TRUE)
}
)Stats 506 PS5
Github repo link
https://github.com/alyssawyang/stats506ps5
Problem 1: OOP Programming
1a
# Constructor
rational <- function(a, b = 1) {
new("rational", a = a, b = b)
}# Show method
setMethod("show", "rational",
function(object) {
if (object@b == 1) {
print(object@a)
}
else if (object@a == 0) {
print(0)
}
else {
cat(paste0(object@a, "/", object@b, "\n"))
}
return(invisible(object))
}
)# GCD and LCM in RCpp
library(Rcpp)
cppFunction("
#include <numeric>`
int C_gcd(int a, int b) {
return std::gcd(a, b);
}")
cppFunction("
#include <numeric>
int C_lcm(int a, int b) {
return std::lcm(a, b);
}")# Simplify method
setGeneric("simplify",
function(object, ...) {
standardGeneric("simplify")
})[1] "simplify"
setMethod("simplify", "rational",
function(object, print_result = TRUE) {
gcd <- C_gcd(object@a, object@b)
object@a <- object@a / gcd
object@b <- object@b / gcd
if (print_result) {
show(object)
}
return(invisible(object))
})# Quotient method
setGeneric("quotient",
function(object, digits = 4) {
standardGeneric("quotient")
})[1] "quotient"
setMethod("quotient", "rational",
function(object, digits = 4) {
if (!is.numeric(digits) || digits != as.integer(digits) || digits < 0) {
stop("Digits must be a non-negative integer.")
}
result <- object@a / object@b
print(sprintf(paste0("%.", digits, "f"), result))
return(invisible(result))
})# +, -, *, /
setMethod("+", signature(e1 = "rational",
e2 = "rational"),
function(e1, e2) {
lcm <- C_lcm(e1@b, e2@b)
num1 <- (lcm / e1@b) * e1@a
num2 <- (lcm / e2@b) * e2@a
return(simplify(rational(a = num1 + num2, b = lcm), FALSE))
})
setMethod("-", signature(e1 = "rational",
e2 = "rational"),
function(e1, e2) {
lcm <- C_lcm(e1@b, e2@b)
num1 <- (lcm / e1@b) * e1@a
num2 <- (lcm / e2@b) * e2@a
return(simplify(rational(a = num1 - num2, b = lcm), FALSE))
})
setMethod("*", signature(e1 = "rational",
e2 = "rational"),
function(e1, e2) {
return(simplify(rational(a = e1@a * e2@a, b = e1@b * e2@b), FALSE))
})
setMethod("/", signature(e1 = "rational",
e2 = "rational"),
function(e1, e2) {
return(simplify(rational(a = e1@a * e2@b, b = e1@b * e2@a), FALSE))
})1b
r1 <- rational(a = 24, b = 6)
r2 <- rational(a = 7, b = 230)
r3 <- rational(a = 0, b = 4)r124/6
r3[1] 0
r1 + r2927/230
r1 - r2913/230
r1 * r214/115
r1 / r2920/7
r1 + r3[1] 4
r1 * r3[1] 0
r2 / r3Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'simplify': invalid class "rational" object: Denominator cannot be zero.
quotient(r1)[1] "4.0000"
quotient(r2)[1] "0.0304"
quotient(r2, digits = 3)[1] "0.030"
quotient(r2, digits = 3.14)Error in quotient(r2, digits = 3.14): Digits must be a non-negative integer.
quotient(r2, digits = "avocado")Error in quotient(r2, digits = "avocado"): Digits must be a non-negative integer.
q2 <- quotient(r2, digits = 3)[1] "0.030"
q2[1] 0.03043478
quotient(r3)[1] "0.0000"
simplify(r1)[1] 4
simplify(r2)7/230
simplify(r3)[1] 0
1c
# Check for no creation of 0's in denominator
t1 <- rational(a = 2, b = 0)Error in validObject(.Object): invalid class "rational" object: Denominator cannot be zero.
t2 <- rational(a = 0, b = 0)Error in validObject(.Object): invalid class "rational" object: Denominator cannot be zero.
t3 <- rational(a = 1, b = 2)
t4 <- rational(a = 0, b = 4)
t5 <- t3 / t4Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'simplify': invalid class "rational" object: Denominator cannot be zero.
# Check for other malformed inputs
t6 <- rational(a = "a", b = "b")Error in validObject(.Object): invalid class "rational" object: 1: invalid object for slot "a" in class "rational": got class "character", should be or extend class "numeric"
invalid class "rational" object: 2: invalid object for slot "b" in class "rational": got class "character", should be or extend class "numeric"
t7 <- rational(a = "3", b = "4")Error in validObject(.Object): invalid class "rational" object: 1: invalid object for slot "a" in class "rational": got class "character", should be or extend class "numeric"
invalid class "rational" object: 2: invalid object for slot "b" in class "rational": got class "character", should be or extend class "numeric"
t9 <- rational(a = 2)
t10 <- rational(b = 2)Error in rational(b = 2): argument "a" is missing, with no default
t11 <- rational()Error in rational(): argument "a" is missing, with no default
Problem 2: plotly
2a
Code
suppressMessages(library(dplyr))
library(tidyr)
library(stringr)
library(ggplot2)
suppressMessages(library(plotly))
art_sales <- read.csv("df_for_ml_improved_new_market.csv")Code
# Collapse genre columns into one column
art_sales_genres <- art_sales %>%
pivot_longer(cols = starts_with("Genre___"),
names_to = "genre",
values_to = "count")
# Rename genre values and select relevant columns
art_sales_genres <- art_sales_genres %>%
mutate(genre = str_remove(genre, "Genre___")) %>%
filter(count > 0) %>%
select(year, genre)
# Calculate proportions of genre of sales across years
genre_dist <- art_sales_genres %>%
group_by(year, genre) %>%
summarize(num_sales = n()) %>%
mutate(proportion = num_sales / sum(num_sales))`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
Code
# Plot genre distribution across years
ggplot(genre_dist, aes(x = year, y = proportion, fill = genre)) +
geom_area(alpha = 0.75, size = 0.5, color = "white") +
labs(title = "Distribution of Genre of Sales Across Years",
x = "Year",
y = "Proportion of Sales")Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
2b
Code
# Collapse genre columns into one column
art_sales_genres <- art_sales %>%
pivot_longer(cols = starts_with("Genre___"),
names_to = "genre",
values_to = "count")
# Rename genre values and select relevant columns
art_sales_genres <- art_sales_genres %>%
mutate(genre = str_remove(genre, "Genre___")) %>%
filter(count > 0) %>%
select(year, genre, price_usd)
# Calculate average sales price by year and genre
avg_price_genre <- art_sales_genres %>%
group_by(year, genre) %>%
summarize(avg_sales_price = mean(price_usd, na.rm = TRUE))`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
Code
# Filter genre dataframe for each genre to plot
others <- avg_price_genre %>%
filter(genre == "Others")
painting <- avg_price_genre %>%
filter(genre == "Painting")
photography <- avg_price_genre %>%
filter(genre == "Photography")
print <- avg_price_genre %>%
filter(genre == "Print")
sculpture <- avg_price_genre %>%
filter(genre == "Sculpture")Code
library(plotly)
# Create the plot with both sales price and genre average lines
p <- plot_ly() |>
add_markers(data = art_sales,
x = ~year, y = ~price_usd,
name = "Sales Price",
marker = list(opacity = 0.5, color = "gray")) |>
add_lines(data = art_sales,
x = ~year, y = ~fitted(loess(price_usd ~ year, data = art_sales)),
name = "Trend line",
line = list(color = "red")) |>
add_markers(data = others,
x = ~year, y = ~avg_sales_price,
name = "Others", mode = "lines",
line = list(color = "orange"), marker = list(opacity = 0)) |>
add_markers(data = painting,
x = ~year, y = ~avg_sales_price,
name = "Painting", mode = "lines",
line = list(color = "yellow"), marker = list(opacity = 0)) |>
add_markers(data = photography,
x = ~year, y = ~avg_sales_price,
name = "Photography", mode = "lines",
line = list(color = "green"), marker = list(opacity = 0)) |>
add_markers(data = print,
x = ~year, y = ~avg_sales_price,
name = "Print", mode = "lines",
line = list(color = "blue"), marker = list(opacity = 0)) |>
add_markers(data = sculpture,
x = ~year, y = ~avg_sales_price,
name = "Sculpture", mode = "lines",
line = list(color = "purple"), marker = list(opacity = 0)) |>
layout(
title = "Change in Sales Price in USD Over Time",
xaxis = list(title = "Year"),
yaxis = list(title = "Sales Price in USD", range = c(0, 100000)),
showlegend = TRUE
)Code
# Create menus to switch between overall trend and genres
p |> layout(updatemenus = list(
list(
y = 0.95,
buttons = list(
list(method = "update",
args = list(list(visible = list(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE))),
label = "All lines"),
list(method = "update",
args = list(list(visible = list(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE))),
label = "Overall trend"),
list(method = "update",
args = list(list(visible = list(TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE))),
label = "All genres"),
list(method = "update",
args = list(list(visible = list(TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE))),
label = "Others"),
list(method = "update",
args = list(list(visible = list(TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE))),
label = "Painting"),
list(method = "update",
args = list(list(visible = list(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE))),
label = "Photography"),
list(method = "update",
args = list(list(visible = list(TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE))),
label = "Print"),
list(method = "update",
args = list(list(visible = list(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE))),
label = "Sculpture")
)
)
))A line object has been specified, but lines is not in the mode
Adding lines to the mode...
A line object has been specified, but lines is not in the mode
Adding lines to the mode...
A line object has been specified, but lines is not in the mode
Adding lines to the mode...
A line object has been specified, but lines is not in the mode
Adding lines to the mode...
A line object has been specified, but lines is not in the mode
Adding lines to the mode...
Problem 3:
3a
suppressMessages(library(data.table))
library(nycflights13)
flights <- data.table(flights)
airports <- data.table(airports)
planes <- data.table(planes)# Departure delays
flights_dep <- merge(flights[, faa := origin],
airports,
by = "faa",
all.x = TRUE)
flights_dep[, .(mean_delay = mean(dep_delay, na.rm = TRUE),
median_delay = median(dep_delay, na.rm = TRUE),
num_flights = .N),
by = name] |>
_[num_flights >= 10] |>
_[,!"num_flights"] |>
_[order(mean_delay, decreasing = TRUE)] name mean_delay median_delay
<char> <num> <num>
1: Newark Liberty Intl 15.10795 -1
2: John F Kennedy Intl 12.11216 -1
3: La Guardia 10.34688 -3
# Arrival delays
flights_arr <- merge(flights[, faa := dest],
airports,
by = "faa",
all.x = TRUE)
flights_arr[, .(name = ifelse(is.na(name[1]), faa[1], name[1]),
mean_delay = mean(arr_delay, na.rm = TRUE),
med_delay = median(arr_delay, na.rm = TRUE),
num_flights = .N),
by = faa] |>
_[num_flights >= 10] |>
_[, !c("faa", "num_flights")] |>
_[order(mean_delay, decreasing = TRUE)] |>
print(x = _, nrows = 110) name mean_delay med_delay
<char> <num> <num>
1: Columbia Metropolitan 41.76415094 28.0
2: Tulsa Intl 33.65986395 14.0
3: Will Rogers World 30.61904762 16.0
4: Jackson Hole Airport 28.09523810 15.0
5: Mc Ghee Tyson 24.06920415 2.0
6: Dane Co Rgnl Truax Fld 20.19604317 1.0
7: Richmond Intl 20.11125320 1.0
8: Akron Canton Regional Airport 19.69833729 3.0
9: Des Moines Intl 19.00573614 0.0
10: Gerald R Ford Intl 18.18956044 1.0
11: Birmingham Intl 16.87732342 -2.0
12: Theodore Francis Green State 16.23463687 1.0
13: Greenville-Spartanburg International 15.93544304 -0.5
14: Cincinnati Northern Kentucky Intl 15.36456376 -3.0
15: Savannah Hilton Head Intl 15.12950601 -1.0
16: Manchester Regional Airport 14.78755365 -3.0
17: Eppley Afld 14.69889841 -2.0
18: Yeager 14.67164179 -1.5
19: Kansas City Intl 14.51405836 0.0
20: Albany Intl 14.39712919 -4.0
21: General Mitchell Intl 14.16722038 0.0
22: Piedmont Triad 14.11260054 -2.0
23: Washington Dulles Intl 13.86420212 -3.0
24: Cherry Capital Airport 12.96842105 -10.0
25: James M Cox Dayton Intl 12.68048606 -3.0
26: Louisville International Airport 12.66938406 -2.0
27: Chicago Midway Intl 12.36422360 -1.0
28: Sacramento Intl 12.10992908 4.0
29: Jacksonville Intl 11.84483416 -2.0
30: Nashville Intl 11.81245891 -2.0
31: Portland Intl Jetport 11.66040210 -4.0
32: Greater Rochester Intl 11.56064461 -5.0
33: Hartsfield Jackson Atlanta Intl 11.30011285 -1.0
34: Lambert St Louis Intl 11.07846451 -3.0
35: Norfolk Intl 10.94909344 -4.0
36: Baltimore Washington Intl 10.72673385 -5.0
37: Memphis Intl 10.64531435 -2.5
38: Port Columbus Intl 10.60132291 -3.0
39: Charleston Afb Intl 10.59296847 -4.0
40: Philadelphia Intl 10.12719014 -3.0
41: Raleigh Durham Intl 10.05238095 -3.0
42: Indianapolis Intl 9.94043412 -3.0
43: Charlottesville-Albemarle 9.50000000 -5.0
44: Cleveland Hopkins Intl 9.18161129 -5.0
45: Ronald Reagan Washington Natl 9.06695204 -2.0
46: Burlington Intl 8.95099602 -4.0
47: Buffalo Niagara Intl 8.94595186 -5.0
48: Syracuse Hancock Intl 8.90392501 -5.0
49: Denver Intl 8.60650021 -2.0
50: Palm Beach Intl 8.56297210 -3.0
51: BQN 8.24549550 -1.0
52: Bob Hope 8.17567568 -3.0
53: Fort Lauderdale Hollywood Intl 8.08212154 -3.0
54: Bangor Intl 8.02793296 -9.0
55: Asheville Regional Airport 8.00383142 -1.0
56: PSE 7.87150838 0.0
57: Pittsburgh Intl 7.68099053 -5.0
58: Gallatin Field 7.60000000 -2.0
59: NW Arkansas Regional 7.46572581 -2.0
60: Tampa Intl 7.40852503 -4.0
61: Charlotte Douglas Intl 7.36031885 -3.0
62: Minneapolis St Paul Intl 7.27016886 -5.0
63: William P Hobby 7.17618819 -4.0
64: Bradley Intl 7.04854369 -10.0
65: San Antonio Intl 6.94537178 -9.0
66: South Bend Rgnl 6.50000000 -3.5
67: Louis Armstrong New Orleans Intl 6.49017497 -6.0
68: Key West Intl 6.35294118 7.0
69: Eagle Co Rgnl 6.30434783 -4.0
70: Austin Bergstrom Intl 6.01990875 -5.0
71: Chicago Ohare Intl 5.87661475 -8.0
72: Orlando Intl 5.45464309 -5.0
73: Detroit Metro Wayne Co 5.42996346 -7.0
74: Portland Intl 5.14157973 -5.0
75: Nantucket Mem 4.85227273 -3.0
76: Wilmington Intl 4.63551402 -7.0
77: Myrtle Beach Intl 4.60344828 -13.0
78: Albuquerque International Sunport 4.38188976 -5.5
79: George Bush Intercontinental 4.24079040 -5.0
80: Norman Y Mineta San Jose Intl 3.44817073 -7.0
81: Southwest Florida Intl 3.23814963 -5.0
82: San Diego Intl 3.13916574 -5.0
83: Sarasota Bradenton Intl 3.08243131 -5.0
84: Metropolitan Oakland Intl 3.07766990 -9.0
85: General Edward Lawrence Logan Intl 2.91439222 -9.0
86: San Francisco Intl 2.67289152 -8.0
87: SJU 2.52052659 -6.0
88: Yampa Valley 2.14285714 2.0
89: Phoenix Sky Harbor Intl 2.09704733 -6.0
90: Montrose Regional Airport 1.78571429 -10.5
91: Los Angeles Intl 0.54711094 -7.0
92: Dallas Fort Worth Intl 0.32212685 -9.0
93: Miami Intl 0.29905978 -9.0
94: Mc Carran Intl 0.25772849 -8.0
95: Salt Lake City Intl 0.17625459 -8.0
96: Long Beach -0.06202723 -10.0
97: Martha\\\\'s Vineyard -0.28571429 -11.0
98: Seattle Tacoma Intl -1.09909910 -11.0
99: Honolulu Intl -1.36519258 -7.0
100: STT -3.83590734 -9.0
101: John Wayne Arpt Orange Co -7.86822660 -11.0
102: Palm Springs Intl -12.72222222 -13.5
name mean_delay med_delay
3b
flights_planes <- merge(flights,
planes,
by = "tailnum",
all.x = TRUE)
flights_planes[, .(avg_mph = mean(distance/(air_time/60)),
num_flights = .N),
by = model] |>
_[order(avg_mph, decreasing = TRUE)] |>
_[head(1)] model avg_mph num_flights
<char> <num> <int>
1: 777-222 482.6254 4